In [ ]:
# Import packages, data
import pandas as pd
import altair as alt
alt.data_transformers.enable('default',max_rows=None)

# Import and clean shrimp
shrimp_df = pd.read_csv("../../data/modified/shrimp/seasons.csv")
shrimp_df['Date'] = pd.to_datetime(shrimp_df['Date'])

# Import and clean elevation
elevation_df = pd.read_csv("../../data/raw/USGS_Water_Levels/usgs_water_levels.tsv", sep='\t', skiprows=29).tail(-1)
elevation_df = elevation_df.rename(
    columns={
        "datetime": "Date",
        "178324_62614_00003": "water_elevation_ft",
        "178324_62614_00003_cd": "data_quality"
    }
)
elevation_df = elevation_df.dropna(subset=['water_elevation_ft'])
elevation_df = elevation_df.drop(columns=['agency_cd', 'site_no', 'data_quality'])
elevation_df['Date'] = pd.to_datetime(elevation_df['Date'])
elevation_df.head()
Out[ ]:
Date water_elevation_ft
1 1966-04-15 4194.90
17 1966-05-01 4194.90
31 1966-05-15 4194.90
48 1966-06-01 4194.90
62 1966-06-15 4194.60
In [ ]:
shrimp_df.head()
Out[ ]:
Date Cysts Nauplii Juveniles Males Total Females Females Females with Eggs/Naups Females with Cysts Season
0 2023-01-12 21.47 0.00 0.00 0.00 0.00 0.00 0.00 0.0 2022-23
1 2023-01-04 17.62 0.00 0.00 0.00 0.00 0.00 0.00 0.0 2022-23
2 2022-12-19 20.48 0.00 0.00 0.00 0.00 0.00 0.00 0.0 2022-23
3 2022-12-05 127.81 0.00 0.00 0.02 0.03 0.02 0.01 0.0 2022-23
4 2022-11-17 98.76 0.41 0.37 0.46 0.46 0.16 0.09 0.2 2022-23
In [ ]:
# Join datasets
df = elevation_df.join(shrimp_df.set_index("Date"), on="Date")

# Fill missing data in Season
df.loc[df['Date'] < min(shrimp_df['Date']), 'Season'] = "Not recorded"
df.loc[df['Date'] > max(shrimp_df['Date']), 'Season'] = "Not recorded"
df['Season'] = df['Season'].fillna(method="ffill")

df.head()
Out[ ]:
Date water_elevation_ft Cysts Nauplii Juveniles Males Total Females Females Females with Eggs/Naups Females with Cysts Season
1 1966-04-15 4194.90 NaN NaN NaN NaN NaN NaN NaN NaN Not recorded
17 1966-05-01 4194.90 NaN NaN NaN NaN NaN NaN NaN NaN Not recorded
31 1966-05-15 4194.90 NaN NaN NaN NaN NaN NaN NaN NaN Not recorded
48 1966-06-01 4194.90 NaN NaN NaN NaN NaN NaN NaN NaN Not recorded
62 1966-06-15 4194.60 NaN NaN NaN NaN NaN NaN NaN NaN Not recorded
In [ ]:
# Write to csv
df.to_csv("../../data/modified/shrimp/seasons_levels.csv")
In [ ]:
df_filtered = df[df['Season'] != "Not recorded"]

df_filtered = pd.melt(
    df_filtered, 
    id_vars=['Season', 'Date', 'water_elevation_ft'], 
    value_vars=[
       'Nauplii', 'Juveniles', 'Males',
       'Total Females', 'Females', 'Females with Eggs/Naups',
       'Females with Cysts'
    ], 
    var_name="brine_type",
    value_name="brine_count_per_liter"
).sort_values('Date')

df_filtered = df_filtered.reset_index(drop=True)

df_filtered.head()
Out[ ]:
Season Date water_elevation_ft brine_type brine_count_per_liter
0 2010-11 2010-08-31 4193.52 Nauplii 1.30
1 2010-11 2010-08-31 4193.52 Juveniles 0.16
2 2010-11 2010-08-31 4193.52 Females with Cysts 0.16
3 2010-11 2010-08-31 4193.52 Total Females 0.32
4 2010-11 2010-08-31 4193.52 Females 0.09
In [ ]:
# Elevation line plot
season_selection = alt.selection_single(fields=['Season'], name="Random")

color1 = alt.condition(season_selection,
                      alt.Color('Season:N', title="Harvest Season"),
                      alt.value('lightgray'))

elevation_line = (
    alt.Chart(df_filtered)
        .mark_line()
        .encode(
                x=alt.X('Date:T'),
                y=alt.Y(
                       'water_elevation_ft:Q',
                       scale=alt.Scale(domain=[df_filtered['water_elevation_ft'].min(), df_filtered['water_elevation_ft'].max()])
                ),
                color=color1
        )
).add_selection(season_selection)

elevation_line.title = "Great Salt Lake Water Elevation"
elevation_line.encoding.x.title = 'Date'
elevation_line.encoding.y.title = 'Water Elevation (ft)'

elevation_line
/Users/joshgladwell/opt/anaconda3/envs/anly503/lib/python3.10/site-packages/altair/utils/core.py:317: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
  for col_name, dtype in df.dtypes.iteritems():
Out[ ]:
In [ ]:
# Bar plot
# color = alt.condition(season_selection,
#                       alt.value('steelblue'),
#                       alt.value('lightgray'))

shrimp_bar = (
    alt.Chart(df_filtered)
    .mark_bar()
    .encode(
        y=alt.Y(
            'mean(brine_count_per_liter):Q', 
            scale=alt.Scale(domain=[0, 4])
        ),
        x=alt.X('brine_type:N'),
        color=alt.value('#265F58')

    )
).transform_filter(season_selection)

shrimp_bar.title ="Average Brine Shrimp Counts for Harvest"
shrimp_bar.encoding.x.title = 'Brine Shrimp'
shrimp_bar.encoding.y.title = 'Count (per liter of lakewater)'
(elevation_line | shrimp_bar).save("./shrimp_chart.html")
In [ ]:
elevation_line | shrimp_bar
Out[ ]: